{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "4f2dc226-adbc-411e-99c9-738112cb2d93",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: http://mirrors.aliyun.com/pypi/simple\n",
      "Collecting xgboost\n",
      "  Downloading http://mirrors.aliyun.com/pypi/packages/c3/eb/496aa2f5d356af4185f770bc76055307f8d1870e11016b10fd779b21769c/xgboost-2.0.3-py3-none-manylinux2014_x86_64.whl (297.1 MB)\n",
      "\u001b[K     |████████████████████████████████| 297.1 MB 357 kB/s eta 0:00:01     |███████████████████████████▎    | 253.5 MB 386 kB/s eta 0:01:53     |███████████████████████████▍    | 254.1 MB 290 kB/s eta 0:02:28     |████████████████████████████▉   | 267.7 MB 337 kB/s eta 0:01:28\n",
      "\u001b[?25hRequirement already satisfied: numpy in /root/miniconda3/lib/python3.8/site-packages (from xgboost) (1.21.4)\n",
      "Requirement already satisfied: scipy in /root/miniconda3/lib/python3.8/site-packages (from xgboost) (1.10.1)\n",
      "Installing collected packages: xgboost\n",
      "Successfully installed xgboost-2.0.3\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# !pip install xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "83f5001c-4f04-466b-b942-ef2d3800f8b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据预处理完成，准备进行模型训练和测试。\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "from sklearn.model_selection import train_test_split\n",
    "import numpy as np\n",
    "\n",
    "# 加载数据\n",
    "data = pd.read_csv('./combined_data.csv')\n",
    "\n",
    "# 删除不需要的列，例如时间戳或IP地址（假设你的数据集中有这些列）\n",
    "data.drop([' Timestamp'], axis=1, inplace=True)\n",
    "\n",
    "# 类型转换，将分类标签编码\n",
    "label_encoder = LabelEncoder()\n",
    "data[' Label'] = label_encoder.fit_transform(data[' Label'])\n",
    "\n",
    "# 检查并处理无穷大和非常大的数值\n",
    "data.replace([np.inf, -np.inf], np.nan, inplace=True)  # 将inf替换为NaN\n",
    "data.fillna(data.median(), inplace=True)  # 使用中位数填充NaN，确保之前中位数计算不包括inf\n",
    "\n",
    "# 特征标准化\n",
    "scaler = StandardScaler()\n",
    "X = scaler.fit_transform(data.drop(' Label', axis=1))  # 确保标签列不参与标准化\n",
    "y = data[' Label']\n",
    "\n",
    "# 划分训练集和测试集\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
    "\n",
    "print(\"数据预处理完成，准备进行模型训练和测试。\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c3cb72e1-33a8-4fa6-92ad-eed5ab041fe4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logistic Regression Accuracy: 54.96%\n",
      "Random Forest Accuracy: 62.04%\n",
      "SVM Accuracy: 50.17%\n",
      "XGBoost Accuracy: 62.75%\n",
      "\n",
      "Classification Report for XGBoost:\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.99      0.99      0.99       170\n",
      "           1       0.50      0.42      0.45       143\n",
      "           2       0.31      0.25      0.28       174\n",
      "           3       0.56      0.52      0.54       159\n",
      "           4       0.99      0.99      0.99       145\n",
      "           5       0.45      0.42      0.43       146\n",
      "           6       0.60      0.65      0.63       148\n",
      "           7       0.46      0.55      0.50       121\n",
      "           8       0.36      0.46      0.40       144\n",
      "           9       0.54      0.56      0.55       156\n",
      "          10       0.38      0.40      0.39       154\n",
      "          11       0.40      0.44      0.42       146\n",
      "          12       0.99      0.98      0.99       150\n",
      "          13       1.00      0.97      0.99       158\n",
      "          14       0.51      0.49      0.50       130\n",
      "          15       0.92      0.90      0.91       156\n",
      "\n",
      "    accuracy                           0.63      2400\n",
      "   macro avg       0.62      0.62      0.62      2400\n",
      "weighted avg       0.63      0.63      0.63      2400\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.svm import SVC\n",
    "from xgboost import XGBClassifier\n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "\n",
    "# 初始化模型\n",
    "logreg = LogisticRegression(max_iter=1000)\n",
    "rf = RandomForestClassifier(n_estimators=100)\n",
    "svm = SVC()\n",
    "xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')\n",
    "\n",
    "# 训练逻辑回归模型\n",
    "logreg.fit(X_train, y_train)\n",
    "y_pred_logreg = logreg.predict(X_test)\n",
    "print(\"Logistic Regression Accuracy: {:.2f}%\".format(accuracy_score(y_test, y_pred_logreg) * 100))\n",
    "\n",
    "# 训练随机森林模型\n",
    "rf.fit(X_train, y_train)\n",
    "y_pred_rf = rf.predict(X_test)\n",
    "print(\"Random Forest Accuracy: {:.2f}%\".format(accuracy_score(y_test, y_pred_rf) * 100))\n",
    "\n",
    "# 训练支持向量机模型\n",
    "svm.fit(X_train, y_train)\n",
    "y_pred_svm = svm.predict(X_test)\n",
    "print(\"SVM Accuracy: {:.2f}%\".format(accuracy_score(y_test, y_pred_svm) * 100))\n",
    "\n",
    "# 训练XGBoost模型\n",
    "xgb.fit(X_train, y_train)\n",
    "y_pred_xgb = xgb.predict(X_test)\n",
    "print(\"XGBoost Accuracy: {:.2f}%\".format(accuracy_score(y_test, y_pred_xgb) * 100))\n",
    "\n",
    "# 打印分类报告（以XGBoost为例）\n",
    "print(\"\\nClassification Report for XGBoost:\")\n",
    "print(classification_report(y_test, y_pred_xgb))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4978d75-3b7d-49ca-93a1-7c84b1e99bc3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
